Calculate the equal-weighted MI for the H3N2 polymerase

Read in data

seq_times <- readRDS("../data/seq_times_091122.rda")

polHA_groups <- seq_times %>%
  select(ID = rowname, group = date)

polHA <- readMSA("../../H3N2_Processing/polha_aa.fasta")

Calculate MI

  • Uncomment code to actually run
  • Warning: for H3N2 pol-HA, takes ~1 hour with ncores = 7
# polHA_MI <- calculateMI(polHA, weighted = TRUE, groups = polHA_groups,
#                      weights = "equal", ncores = 7)
# 
# saveRDS(polHA_MI, "../data/h3n2polHA_equalMI.rds")

Generate Network Viz

  • Use associationsubgraphs package
  • Set default step to “min_max_rule”
  • Use mi_network_info to get readable index names
polHA_MI <- readRDS("../data/h3n2polHA_equalMI.rds")
mi_network_info <- readRDS("../data/mi_network_info.rds")

polHA_MI_IDs <- polHA_MI %>%
  left_join(mi_network_info, by = c("V1" = "index")) %>%
  left_join(mi_network_info, by = c("V2" = "index")) %>%
  filter(z_score > 4) %>%
  select(a = "id.x", b = "id.y", strength = z_score)

# saveRDS(polHA_MI_IDs,"/Users/sarcos/Desktop/Lauring Lab/MI_Networks_App/data/polHA_MI.rds")

visualize_subgraph_structure(
  association_pairs = polHA_MI_IDs,
  node_info = mi_network_info %>% select(-index),
  trim_subgraph_results = FALSE,
  default_step = "min_max_rule"
)
## Calculating subgraph structure results...
## ...finished
## Warning in visualize_subgraph_structure(association_pairs = polHA_MI_IDs, :
## There are 2472 ids in the node_info dataframe that were not seen in association
## pairs. These are omitted.

Tally high-scoring MIp among polymerase subunits

  • “High-scoring” means z-score > 4
polHA_MI_tallys <- polHA_MI_IDs  %>%
  mutate(grouping = paste(a,b, sep = ", ")) %>%
  mutate(grouping = str_remove_all(grouping, pattern = "_[0-9]*")) %>%
  count(grouping) %>%
  arrange(n) %>% 
  mutate(grouping = factor(grouping, levels = unique(grouping)))

ggplot(polHA_MI_tallys, aes(x = grouping, y = n)) +
  geom_col(alpha = 0.7, fill = "grey") +
  theme_classic() +
  labs(title = "wMI pairs within and between polymerase proteins and HA",
       x = "Group", y = "Count") +
  theme(text = element_text(size = 12),
        legend.position = "none")

ggsave("../Figures/mipHA_pairs_all.pdf",
       width = 6.8, height = 3)

polHA_MI_tallys_summary <- polHA_MI_IDs  %>%
  mutate(group1 = paste(a,b, sep = ", ")) %>%
  mutate(group1 = str_remove_all(group1, pattern = "_[0-9]*")) %>%
  mutate(grouping = case_when(
    group1 %in% c("PB1, HA", "PB2, HA", "PA, HA") ~ "Pol-HA",
    group1 == "HA, HA" ~ "HA-HA",
    TRUE ~ "Pol-Pol"
  )) %>%
  count(grouping) %>%
  arrange(n) %>% 
  mutate(grouping = factor(grouping, levels = unique(grouping)))

ggplot(polHA_MI_tallys_summary, aes(x = grouping, y = n)) +
  geom_col(alpha = 0.7, fill = "grey") +
  theme_classic() +
  labs(title = "wMI pairs within and between polymerase proteins and HA",
       x = "Group", y = "Count") +
  theme(text = element_text(size = 12),
        legend.position = "none")

# ggsave("../Figures/mipHA_pairs_summary.pdf",
#        width = 5, height = 3)

Plot amino acid frequencies for top subgraphs with HA

#Subgraph 5
plot_example_freqs(c(194, 338, 569, 2243))

# ggsave("../Figures/freqHA_subgraph5.pdf",
#        width = 4, height = 3)

#Subgraph 9
plot_example_freqs(c(697, 1828, 1859, 2089, 2305))

# ggsave("../Figures/freqHA_subgraph9.pdf",
#        width = 4, height = 3.5)

Plot amino acid frequencies for top subgraphs without HA

#Subgraph 4
plot_example_freqs(c(1378,1468))

# ggsave("../Figures/freqHA_subgraph4.pdf",
#        width = 4, height = 2)

#Subgraph 8
plot_example_freqs(c(811, 1335))

# ggsave("../Figures/freqHA_subgraph8.pdf",
#        width = 4, height = 2)

#Subgraph 10
plot_example_freqs(c(107, 1228, 1866))

# ggsave("../Figures/freqHA_subgraph10.pdf",
#        width = 4, height = 2.5)

Plot distribution of top MIp among groups

polHA_MI_groups <- polHA_MI_IDs  %>%
  mutate(grouping = paste(a,b, sep = ", ")) %>%
  mutate(grouping = str_remove_all(grouping, pattern = "_[0-9]*"))

ggplot(polHA_MI_groups, aes(x = factor(grouping, levels = polHA_MI_tallys$grouping), y = log10(strength))) +
  geom_violin() +
  stat_summary(fun.data = "mean_cl_boot", geom = "pointrange",
               colour = "red") +
  theme_classic() +
  theme(text = element_text(size = 12),
        legend.position = "none")

polHA_MI_summarygroups <- polHA_MI_IDs  %>%
  mutate(group1 = paste(a,b, sep = ", ")) %>%
  mutate(group1 = str_remove_all(group1, pattern = "_[0-9]*")) %>%
    mutate(grouping = case_when(
    group1 %in% c("PB1, HA", "PB2, HA", "PA, HA") ~ "Pol-HA",
    group1 == "HA, HA" ~ "HA-HA",
    TRUE ~ "Pol-Pol"
  ))

ggplot(polHA_MI_summarygroups, aes(x = factor(grouping, levels = polHA_MI_tallys_summary$grouping), y = log10(strength))) +
  geom_violin() +
  stat_summary(fun.data = "mean_cl_boot", geom = "pointrange",
               colour = "red") +
  theme_classic() +
  theme(text = element_text(size = 12),
        legend.position = "none")